# -*- coding: UTF-8 -*-
import os;
import sys;
import re;
import urllib;
import urllib.request;

A_head = "https://www.lydsy.com/JudgeOnline/";
status = "status.php?";
pro_head = "problem.php?id=";
userID_tag = "user_id=";
result_AC = "jresult=4";
userID = "";
explore_headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
                        'Chrome/51.0.2704.63 Safari/537.36'}
ProID_ProName={};
ProID_List=[];
def Find_now_page_ProID(pagecontext):
    # <a href='problem.php?id=2038'>2038</a>
    pattern_ProID = "<a href='problem.*?'>.*?</a>";
    now_recomp = re.compile(pattern_ProID);
    PROBLEM_NOW_PAGE_List = now_recomp.findall(pagecontext);
    # print(PROBLEM_NOW_PAGE_List);
    for vstr in PROBLEM_NOW_PAGE_List:
        isProID=0;L=-1;R=-1;cnt=0;
        for d in vstr:
            cnt+=1;
            if((d=='<')or(d=='>')):
                isProID+=1;
            if((isProID==2)and(L==-1)):
                L=cnt;
            if((isProID==3)and(R==-1)):
                R=cnt-1;
        #[L,R)表示题号区间,python的取其中一段是前闭后开的
        #print(vstr[L:R]);
        ProID_List.append(vstr[L:R]);
        #print(str(L)+" "+str(R));

def Find_next_page(pagecontext):
    pattern_Nextpage = "<a href=.*?>Next Page</a>";
    now_recomp = re.compile(pattern_Nextpage);
    Nextpage = now_recomp.findall(pagecontext);
    top="";prevtop="";
    ERROR_meeeage="NO_RESPONSE";
    print(Nextpage);
    top_prevtop = Nextpage[0][0:-14];
    print(top_prevtop);
    stop=False;
    L=len(top_prevtop);
    Lprevtop=-1;Rprevtop=L-1;
    Ltop=-1;Rtop=-1;
    prevtop_finish=False;
    top_finish=False;
    print(L);
    for i in range(L-1,-1,-1):
        if((top_prevtop[i].isdigit()==False)):
            if(prevtop_finish==False):
                prevtop_finish=True;
                Lprevtop=i+1;
            elif(not (Rtop==-1)):
                top_finish=True;
                Ltop=i+1;
        elif((top_prevtop[i].isdigit()==True)):
            if((prevtop_finish==True)and(Rtop==-1)):
                Rtop=i;
        if((prevtop_finish==True)and(top_finish==True)):
            break;
    top=top_prevtop[Ltop:Rtop+1];
    prevtop=top_prevtop[Lprevtop:Rprevtop+1];

    print(top,prevtop,end=' ');print('');
    analysis=[];
    if(top == prevtop):
        analysis = [ERROR_meeeage];
    else:
        analysis = [top,prevtop];
    #print(Ltop,Rtop,Lprevtop,Rprevtop,end=' ');
    return analysis;
    #如果top==prevtop则说明遍历结束了。

def get_ProName(pagecontext):
    pattern_Name = "<title>";
    L = len(pagecontext);
    the_Name = "";
    for i in range(0,L):
        if((pagecontext[i] == '<')and(pagecontext[i:i+7] == pattern_Name)):
            for j in range(i+7,L):
                if(pagecontext[j] == '<'):
                    break;
                the_Name = the_Name + pagecontext[j];
    return the_Name;


if(__name__ == '__main__'):
    ProID_ProName.clear();
    #print("DXY _ crawler.(bzoj)");
    #print("输入你的用户名:");
    userID = str(sys.argv[1]);
    URL_address = A_head + status + userID_tag + userID + "&" + result_AC;
    print(URL_address);
    REQUEST_=urllib.request.Request(url=URL_address,headers=explore_headers);
    NOW_webpage = urllib.request.urlopen(REQUEST_);
    #print(NOW_webpage.read().decode("UTF-8"));
    pagecontext = NOW_webpage.read().decode("UTF-8");
    #bzoj的AC记录,top表示当前页最晚的代码的RUNID ,pretop表示下一页的代码中最晚的RUNID:https://www.lydsy.com/JudgeOnline/status.php?user_id=dog1889&jresult=4&top=628405&prevtop=767346
    #如果top==prevtop则说明遍历结束了。
    while(True):
        Find_now_page_ProID(pagecontext);
        analysis = Find_next_page(pagecontext);
        L = len(analysis);
        if(L == 1):break;
        top = analysis[0];
        prevtop = analysis[1];
        NOW_URL = A_head + status + userID_tag + userID + "&" + result_AC + "&top=" + top + "&prevtop=" + prevtop;
        print(NOW_URL);
        REQUEST_ = urllib.request.Request(url=NOW_URL,headers=explore_headers);
        NOW_webpage = urllib.request.urlopen(REQUEST_);
        pagecontext = NOW_webpage.read().decode("UTF-8");


    File = open("ProID.txt","w");
    for i in ProID_List:
        File.write(i+"\n");
    File.close();

    for pro_id in ProID_List:
        NOW_URL = A_head + pro_head + pro_id;
        REQUEST_ = urllib.request.Request(url=NOW_URL,headers=explore_headers);
        NOW_webpage = urllib.request.urlopen(REQUEST_);
        pagecontext = NOW_webpage.read().decode("UTF-8");
        Pro_Name = get_ProName(pagecontext);
        print(Pro_Name);
        ProID_ProName[pro_id] = Pro_Name;

    File = open("ProID_ProName"+userID+"_BZOJ.txt","w",encoding="UTF-8");
    File.write("AC:"+str(len(ProID_ProName))+"\n");
    for pro_id in ProID_ProName:
        File.write(pro_id+" : "+ProID_ProName[pro_id]+"\n");
    File.close();

